# set directory
setwd("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing/Code")

# load packages
require(ggordiplots)
library(microViz)
library("vegan")
library(phyloseqCompanion)
library(metagMisc)
library(SciViews)

# load physeq object
load("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing/Code/phyloseq final variables.RData")

# read in soil data
soil.data <- read.csv("soil.data.final.csv")

# load newest data
load("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing/Code/functional type by sample.RData")

soil.data.nobog <- soil.data[which(soil.data$Transect != "Bog"),]

soil.data.nobog$Dim1 <- c(1:95)
soil.data.nobog$Dim2 <- c(1:95)
soil.data.nobog$Dim3 <- c(1:95)
soil.data.nobog$Dim4 <- c(1:95)
soil.data.nobog$Dim5 <- c(1:95)
soil.data.nobog$ph <- c(1:95)
soil.data.nobog$slope <- c(1:95)
soil.data.nobog$carcass.load <- c(1:95)

# add plant community metrics to soil data
for (i in 1:95){
  for (j in 1:95){
    if (exploration.guild.RA$Stream[i] == soil.data.nobog$Stream[j] & exploration.guild.RA$Distance[i] == soil.data.nobog$Distance[j] & 
        exploration.guild.RA$Side[i] == soil.data.nobog$Side[j] & exploration.guild.RA$Transect[i] == soil.data.nobog$Transect[j]){
      soil.data.nobog$Dim1[j] <- exploration.guild.RA$Dim1[i];
      soil.data.nobog$Dim2[j] <- exploration.guild.RA$Dim2[i];
      soil.data.nobog$Dim3[j] <- exploration.guild.RA$Dim3[i];
      soil.data.nobog$Dim4[j] <- exploration.guild.RA$Dim4[i];
      soil.data.nobog$Dim5[j] <- exploration.guild.RA$Dim5[i];
      soil.data.nobog$ph[j] <- exploration.guild.RA$ph[i];
      soil.data.nobog$slope[j] <- exploration.guild.RA$slope[i];
      soil.data.nobog$carcass.load[j] <- exploration.guild.RA$carcass.load[i];}
  }
}


# add covariates to phyloseq object
sample_data(physeq)$organic.soil.C.N <- soil.data$organic.soil.C.N[1:96]
sample_data(physeq)$organic.soil.GWC <- soil.data$organic.soil.GWC[1:96]
sample_data(physeq)$carcass.deposition <- soil.data$carcass.deposition[1:96]
sample_data(physeq)$ph <- soil.data$ph[1:96]
sample_data(physeq)$decomposing.carcass <- soil.data$decomposing.carcass[1:96]

# transform to equal sampling depth
set.seed(28132)
physeq.r = rarefy_even_depth(physeq)

# remove bog
physeq_nobog <- subset_samples(physeq, Transect != "Bog")

# filter phyloseq object by Hansen stream
physeq_happy <- subset_samples(physeq, Stream=="Happy")
physeq_hansen <- subset_samples(physeq_nobog, Stream=="Hansen")
physeq_yako <- subset_samples(physeq, Stream=="Yako")
physeq_hansen_left <- subset_samples(physeq_hansen, Side=="SL")
physeq_hansen_left_close <- subset_samples(physeq_hansen, Side=="SL" & Distance < 7)
physeq_hansen_right <- subset_samples(physeq_hansen, Side=="SR")

############################################################
# beta diversity by distance at all streams
############################################################

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq_nobog, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])
species.df <- as.data.frame(species.df)

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq)
dist.euc = dist(fungi_samples$organic.soil.C.N, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa.all = as.vector(fungi.bray)
tt.all = as.vector(dist.euc)

#new data frame with vectorized distance matrices
mat = data.frame(aa.all,tt.all)

#abundance vs temperature
p <- ggplot(mat, aes(y = aa.all, x = tt.all)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "C:N", y = "Fungal community dissimilarity") + 
  geom_smooth(method = "lm", alpha = .15) + 
  theme( axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14), 
         axis.title= element_text(size = 14), 
         axis.title.x = element_text(size = 14),
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black")) 
#geom_abline(intercept = 0.8693353661, slope = 0.01)
#scale_y_continuous(limits=c(0.9,1))

setwd("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing")
ggsave(plot=p, width=10, height=6, dpi=300, filename = "Fig5b.jpg")

coef(lm(aa.all ~ tt.all, data = mat))
corr <- cor.test(x=mat$aa.all, y=mat$tt.all, method = 'spearman')
corr

##########################################################
# ADONIS # 
#########################################################

# old code
df = as(sample_data(physeq_nobog), "data.frame")
d = distance(physeq_nobog, "bray")

test = adonis(fungi.bray ~ organic.soil.C.N, df)
test$aov.tab

# new code
adon.results<-adonis(species.df ~ soil.data.nobog$organic.soil.C.N + soil.data.nobog$mineral.soil.C.N + soil.data.nobog$carcass.deposition + soil.data.nobog$organic.soil.GWC + 
                       soil.data.nobog$decomposing.carcass + ln(soil.data.nobog$Distance) + soil.data.nobog$Dim1 + soil.data.nobog$Dim2 + 
                       soil.data.nobog$Dim3 + soil.data.nobog$Dim4 + soil.data.nobog$Dim5 + soil.data.nobog$ph + soil.data.nobog$carcass.load + 
                       soil.data.nobog$Stream + soil.data.nobog$Transect, 
                     method="bray",perm=999)
adon.results$aov.tab

############################################################
# beta diversity by distance at Hansen
############################################################

# filter phyloseq object by Hansen stream
physeq_hansen <- subset_samples(physeq, Stream=="Hansen")

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq_hansen, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq_hansen)
dist.euc = dist(fungi_samples$C.N, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa.hansen = as.vector(fungi.bray)
tt.hansen = as.vector(dist.euc)

#new data frame with vectorized distance matrices
mat.hansen = data.frame(aa.hansen,tt.hansen)

#abundance vs temperature
ggplot(mat.hansen, aes(y = aa.hansen, x = tt.hansen)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", alpha = .15) +
  theme( axis.text.x = element_text(face = "bold",colour = "black", size = 12), 
         axis.text.y = element_text(face = "bold", size = 11, colour = "black"), 
         axis.title= element_text(face = "bold", size = 14, colour = "black"), 
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black")) +ylim(0.7,1)
#geom_smooth(method = "lm", alpha = .15) + scale_y_continuous(limits=c(0.8,1))

############################################################
# beta diversity by distance at Hansen LEFT side
############################################################

# filter phyloseq object by Hansen stream
physeq_hansen <- subset_samples(physeq, Stream=="Hansen")
physeq_hansen_left <- subset_samples(physeq_hansen, Side=="SL")

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq_hansen_left, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq_hansen_left)
dist.euc = dist(fungi_samples$C.N, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa = as.vector(fungi.bray)
tt = as.vector(dist.euc)
name <- rep("Hansen Left",253)

#new data frame with vectorized distance matrices
mat.hansen.left = data.frame(aa, tt, name)

#abundance vs temperature
ggplot(mat.hansen.left, aes(y = aa, x = tt)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", alpha = .15) + 
  theme( axis.text.x = element_text(face = "bold",colour = "black", size = 12), 
         axis.text.y = element_text(face = "bold", size = 11, colour = "black"), 
         axis.title= element_text(face = "bold", size = 14, colour = "black"), 
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black")) 
#geom_smooth(method = "lm", alpha = .15) + scale_y_continuous(limits=c(0.9,1))

############################################################
# beta diversity by distance at Hansen RIGHT side
############################################################

# filter phyloseq object by Hansen stream
physeq_hansen <- subset_samples(physeq, Stream=="Hansen")
physeq_hansen_right <- subset_samples(physeq_hansen, Side=="SR")

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq_hansen_right, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq_hansen_right)
dist.euc = dist(fungi_samples$C.N, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa = as.vector(fungi.bray)
tt = as.vector(dist.euc)
name <- rep("Hansen Right",253)

#new data frame with vectorized distance matrices
mat.hansen.right = data.frame(aa, tt, name)

#abundance vs temperature
ggplot(mat.hansen.right, aes(y = aa, x = tt)) + 
  geom_point(size = 3, alpha = 0.5) +
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", alpha = .15) +
  theme( axis.text.x = element_text(face = "bold",colour = "black", size = 12), 
         axis.text.y = element_text(face = "bold", size = 11, colour = "black"), 
         axis.title= element_text(face = "bold", size = 14, colour = "black"), 
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black")) 
#geom_smooth(method = "lm", alpha = .15) #+ scale_y_continuous(limits=c(0.85,1))
############################################################
# beta diversity by distance at Happy
############################################################

# filter phyloseq object by Happy stream
physeq_happy <- subset_samples(physeq, Stream=="Happy")

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq_happy, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq_happy)
dist.euc = dist(fungi_samples$C.N, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa = as.vector(fungi.bray)
tt = as.vector(dist.euc)
name <- rep("Happy",325)

#new data frame with vectorized distance matrices
mat.happy = data.frame(aa, tt, name)

#abundance vs temperature
ggplot(mat.happy, aes(y = aa, x = tt)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", se = FALSE) +
  theme( axis.text.x = element_text(face = "bold",colour = "black", size = 12), 
         axis.text.y = element_text(face = "bold", size = 11, colour = "black"), 
         axis.title= element_text(face = "bold", size = 14, colour = "black"), 
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black"))

############################################################
# beta diversity by distance at Yako
############################################################

# filter phyloseq object by Happy stream
physeq_yako <- subset_samples(physeq, Stream=="Yako")

# get species abundance matrix from phyloseq object
physeq.df <- phyloseq_to_df(physeq_yako, addtax = F, addtot = F)
species.df <- t(physeq.df[,-1])

# calculate bray curtis dissimilarity from abundance matrix
fungi.bray <- vegdist(species.df, method = "bray")

#environmental vector - euclidean distance
fungi_samples <- sample.data.frame(physeq_yako)
dist.euc = dist(fungi_samples$C.N, method = "euclidean")

#abundance vs environmental 
mantel(fungi.bray, dist.euc, method = "spearman", permutations = 9999, na.rm = TRUE)

aa = as.vector(fungi.bray)
tt = as.vector(dist.euc)
name <- rep("Yako",276)

#new data frame with vectorized distance matrices
mat.yako = data.frame(aa, tt, name)

#abundance vs temperature
ggplot(mat.yako, aes(y = aa, x = tt)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", se = FALSE) +
  theme( axis.text.x = element_text(face = "bold",colour = "black", size = 12), 
         axis.text.y = element_text(face = "bold", size = 11, colour = "black"), 
         axis.title= element_text(face = "bold", size = 14, colour = "black"), 
         panel.background = element_blank(), 
         panel.border = element_rect(fill = NA, colour = "black")) + ylim(0.8,1)

# plot all together

# combine abundance dissimilarity vectors, euclidean distance vectors and labels together
all <- rbind(mat.hansen.left, mat.hansen.right, mat.happy, mat.yako)

ggplot(all, aes(y = aa, x = tt)) + 
  geom_point(size = 3, alpha = 0.5) + 
  labs(x = "Distance (m)", y = "Bray-Curtis Dissimilarity") + 
  geom_smooth(method = "lm", alpha = .15, aes(fill = name, color=name)) + 
  scale_y_continuous(limits=c(0.96,1))